1 Introduction

This module extracts and describes the Census data to be used for Model Evaluation.

The raw data files were downloaded in geojson format from the American Community Survey 2019 - 5 year survey. The ultimate data source is the US Census Bureau. The data distributor is censusreporter.org

library(tidyverse)
library(kableExtra)
library(sf)
library(stars)
library(ggplot2)
library(ggspatial)
library(viridis)
library(leaflet)
library(classInt)
library(RColorBrewer)

2 City Limits

Let’s load the Raleigh Corporate Limits. By superimposing the city limits with census data, we can verify that the city is fully covered by the latter.

raleigh_corporate_limits_file = paste0(data_dir, "Corporate_Limits.geojson")


# Load the geojson files
# change the coordinate system to 2264
# filter objects that belong to RALEIGHT
# --------------------------------------------
gj_raleigh <- sf::st_read(raleigh_corporate_limits_file, quiet = TRUE) %>% 
  st_transform(2264) %>% 
  filter( SHORT_NAME == 'RAL' )

3 Poverty Status (B17017)

Load the poverty geojson census data.

pov_root = "acs2019_5yr_B17017_15000US371830529022"

poverty_B17017_raw = st_read( paste0(data_dir, pov_root, "/", pov_root, ".geojson" )
                                     , quiet = TRUE ) %>% st_transform(2264)

Extract the regional average poverty rate.

regional_avg_poverty_rate = poverty_B17017_raw %>% 
  filter( geoid == "31000US39580") %>% 
  mutate( rate = B17017002 / B17017001)  %>% 
  pull(rate)

Impute any missing poverty rate regions with the median rate for the entire Raleigh-Cary Metro Area.

poverty_B17017_raw %>% 
  select( geoid, name, B17017001, B17017002 ) %>% 
  filter( name  != 'Raleigh-Cary, NC Metro Area')  %>% 
  mutate( poverty_rate = ifelse( B17017001 == 0, regional_avg_poverty_rate , B17017002 / B17017001) )  %>% 
  mutate( area = st_area(.)) -> gj_poverty
pal_fun = colorQuantile("Spectral", NULL , n = 6)

gj_poverty_leaf = st_transform(gj_poverty, 4326)

p_popup <- paste0("Poverty: ", format(round(gj_poverty$poverty_rate, 3), nsmall = 3 ), "<br>" ,
                  "Name: ", gj_poverty$name , "<br>" ,
                  "Geoid: ", gj_poverty$geoid, "<br>" )
                  

# get quantile breaks. Add .00001 offset to catch the lowest value
breaks_qt <- classIntervals(c(min(gj_poverty$poverty_rate) - .00001, 
                              gj_poverty$poverty_rate), n = 6, style = "quantile")

breaks_qt
## style: quantile
##     [-1e-05,0.02073365) [0.02073365,0.04442344) [0.04442344,0.07557252) 
##                     104                     104                     104 
##  [0.07557252,0.1113514)   [0.1113514,0.1808219)   [0.1808219,0.6422414] 
##                     104                     104                     105
leaflet( gj_poverty_leaf ) %>% addPolygons( stroke = FALSE, # Remove polygon borders
                                                 fillColor = ~pal_fun(poverty_rate),  # set fill color with function from above and value
                                                 fillOpacity = 0.5 ,
                                                 smoothFactor = 0.5, # make it nicer
                                                 popup = p_popup
                                                 ) %>% addTiles() %>%
       addLegend( "bottomright", # Location
                    colors = brewer.pal(6, "Spectral") ,
                  labels = paste0("up to ", format(breaks_qt$brks[-1], digits = 2)) ,
                  title = "Raleigh Poverty rate by Census Block Group"
                  )

4 Unemployment (B23025)

Table B23025 contains Employment Status for the Population 16 years and over. The relevant field is Unemployed % from the civilian labor force.

unemp_root = "acs2019_5yr_B23025_15000US371830529022"

unemp_B23025_raw = st_read( paste0(data_dir, unemp_root, "/", unemp_root, ".geojson" )
                                     , quiet = TRUE ) %>% st_transform(2264)

Extract the regional average unemployment rate for imputation of regions with undefined unemployment rates.

regional_avg_unemp_rate  = unemp_B23025_raw %>% 
  filter( geoid == "31000US39580") %>% 
  mutate( rate = B23025005 / B23025001)  %>% 
  pull( rate )

Impute any missing poverty rate regions with the median rate for the entire Raleigh-Cary Metro Area.

unemp_B23025_raw %>% 
  select( geoid, name, B23025005, B23025001 ) %>% 
  filter( name  != 'Raleigh-Cary, NC Metro Area')  %>% 
  mutate( unemployment_rate = ifelse( B23025001  == 0, regional_avg_unemp_rate , B23025005 / B23025001) )  %>% 
  mutate( area = st_area(.)) -> gj_unemp
gj_unemp_leaf = st_transform(gj_unemp, 4326)

p_popup <- paste0("Unemp: ", format(round(100 * gj_unemp$unemployment_rate, 1), nsmall = 1 ), "<br>" ,
                  "Name: ", gj_unemp$name , "<br>" ,
                  "Geoid: ", gj_unemp$geoid, "<br>" )
                  

# get quantile breaks. Add .00001 offset to catch the lowest value
breaks_qt <- classIntervals(c(min(gj_unemp$unemployment_rate) - .00001, 
                              gj_unemp$unemployment_rate), n = 6, style = "quantile")

breaks_qt
## style: quantile
##     [-1e-05,0.002762431) [0.002762431,0.01464605)  [0.01464605,0.02296588) 
##                      104                      103                      105 
##  [0.02296588,0.03476483)  [0.03476483,0.05166586)   [0.05166586,0.1609695] 
##                      104                      104                      105
leaflet( gj_unemp_leaf ) %>% addPolygons( stroke = FALSE, # Remove polygon borders
                                                 fillColor = ~pal_fun(unemployment_rate),  # set fill color with function from above and value
                                                 fillOpacity = 0.5 ,
                                                 smoothFactor = 0.5, # make it nicer
                                                 popup = p_popup
                                                 ) %>% addTiles() %>%
       addLegend( "bottomright", # Location
                    colors = brewer.pal(6, "Spectral") ,
                  labels = paste0("up to ", format(breaks_qt$brks[-1], digits = 2)) ,
                  title = "Raleigh Unemployment By Block Group"
                  )
ggplot() + geom_sf(data=gj_unemp , aes( fill = unemployment_rate  )  ) +
  scale_fill_viridis_c( alpha = .4) +
  annotation_map_tile(type="osm", progress = "none", alpha = 0.5 ) 

5 Household Income (B19013)

Table B19013 contains Median Household Income . The relevant field is Unemployed % from the civilian labor force.

income_root = "acs2019_5yr_B19013_15000US371830523011"

income_B19013_raw = st_read( paste0(data_dir, income_root, "/", income_root, ".geojson" )
                                     , quiet = TRUE ) %>% st_transform(2264)

Extract the regional average unemployment rate for imputation of regions with undefined unemployment rates.

regional_avg_income  = 67266  # Median household income for Raleigh city in 2019 inflated adjusted dollars

Impute any missing median household income with the regional median household income for the entire Raleigh-Cary Metro Area.

income_B19013_raw %>%  select( geoid, name, B19013001 ) %>% 
 filter( str_length(geoid) == 19 ) %>%  # Filter out the higher level aggregate regions shown by shorter geoid like Cary, NC, Raleigh, NC, etc.
 mutate( income = ifelse( is.na(B19013001) | B19013001  == 0, regional_avg_income , B19013001) )  %>% 
 mutate( area = st_area(.)) -> gj_income
gj_income_leaf = st_transform(gj_income, 4326)

p_popup <- paste0("Income: $", format(round( gj_income$income, 0), nsmall = 0 ), "<br>" ,
                  "Name: ", gj_income$name , "<br>" ,
                  "Geoid: ", gj_income$geoid, "<br>" )
                  

# get quantile breaks. Add .00001 offset to catch the lowest value
breaks_qt <- classIntervals(c(min(gj_income$income) - .00001, 
                              gj_income$income), n = 6, style = "quantile")

breaks_qt
## style: quantile
##    [22000,41870.83) [41870.83,56457.33)    [56457.33,67266)    [67266,81388.33) 
##                  41                  41                  36                  46 
## [81388.33,100843.3)   [100843.3,232955] 
##                  41                  41
leaflet( gj_income_leaf ) %>% addPolygons( stroke = FALSE, # Remove polygon borders
                                                 fillColor = ~pal_fun(income),  # set fill color with function from above and value
                                                 fillOpacity = 0.5 ,
                                                 smoothFactor = 0.5, # make it nicer
                                                 popup = p_popup
                                                 ) %>% addTiles() %>%
       addLegend( "bottomright", # Location
                    colors = brewer.pal(6, "Spectral") ,
                  labels = paste0("up to ", format(breaks_qt$brks[-1], digits = 2)) ,
                  title = "Raleigh Median Household Income"
                  )